Load generic libraries
source('configuration.r')
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
Load plot specific libraries
library(foreach)
library(readr)
suppressMessages(library(igraph))
library(ggraph)
library(ggalluvial)
Create graph edge data
dat <- read.table('../tables/antibiotics_gene_linkage.tsv', head=FALSE, stringsAsFactors = FALSE) %>%
mutate(id=paste0(V1,':',V2, ':', V3)) %>%
mutate(V6=str_replace(V6, 'PheCmlA5', 'Phe'))
## Warning: `as_dictionary()` is soft-deprecated as of rlang 0.3.0.
## Please use `as_data_pronoun()` instead
## This warning is displayed once per session.
## Warning: `new_overscope()` is soft-deprecated as of rlang 0.2.0.
## Please use `new_data_mask()` instead
## This warning is displayed once per session.
## Warning: The `parent` argument of `new_data_mask()` is deprecated.
## The parent of the data mask is determined from either:
##
## * The `env` argument of `eval_tidy()`
## * Quosure environments when applicable
## This warning is displayed once per session.
## Warning: `overscope_clean()` is soft-deprecated as of rlang 0.2.0.
## This warning is displayed once per session.
edges.full <- foreach(id=unique(dat$id), .combine=rbind) %do% {
tmp <- dat[dat$id == id, ]
if(nrow(tmp) < 2) {
NULL
}else{
edge <- data.frame(t(combn(sort(tmp$V6), 2)), id=id)
edge$dist <- foreach(r=1:nrow(edge), .combine = c) %do% {
abs(rowMeans(tmp[tmp$V6 == edge[r, ]$X1, 4:5]) - rowMeans(tmp[tmp$V6 == edge[r, ]$X2, 4:5]))
}
edge
}
}
## save for ad hoc analysis
write.table(edges.full, '../output_tables/ar_gene_linkage_edge_list_uncollapsed.tsv', row.names = F, quote=F, sep='\t')
Function for ploting
get_graph_data <- function(pattern='', reverse=FALSE){
if(reverse){
edges.collapsed <- filter(edges.full, !str_detect(id, pattern))
}else{
edges.collapsed <- filter(edges.full, str_detect(id, pattern))
}
## filtering and collapse into edge weights
edges.collapsed <- group_by(edges.collapsed, X1, X2) %>%
tally() %>%
filter(n>2) ### keep edges with more than 2 support
## Run MCL graph clustering
write.table(edges.collapsed, 'tmp_edges.tsv', quote=F, sep='\t', col.names = F, row.names = F)
system('/mnt/software/bin/mcl tmp_edges.tsv --abc -o tmp_mcl.tsv -overlap keep -I 3 2>/dev/null')
mcl <- read_tsv('tmp_mcl.tsv', col_names = F)
grp <- foreach(c=1:nrow(mcl), .combine = 'rbind') %do%{
tmp <- as.character(na.exclude(as.character(mcl[c,])))
data.frame(vertex=tmp, grp=c, stringsAsFactors = F)
} %>% data.frame(row.names = 1)
## generate graph data
g <- graph_from_data_frame(edges.collapsed, directed=FALSE)
## vertices
colors <- pal_simpsons('springfield')(16)
n.colors <- colors[as.numeric(as.factor(str_replace(V(g)$name, '.*_', '')))]
V(g)$color <- n.colors
V(g)$community <- grp[V(g)$name,]##cluster_walktrap(g)$membership
V(g)$Type <- str_replace(V(g)$name, '.*_', '')
## Edges
E(g)$width <- edges.collapsed$n
E(g)$community <-
apply(as.data.frame(get.edgelist(g, names=FALSE)), 1,
function(x)
ifelse(V(g)$community[x[1]] == V(g)$community[x[2]],
LETTERS[V(g)$community[x[1]]], NA))
## format vertex names
aux <- function(x){
if(length(x) > 2){
paste0(x[1],'_',x[2])
}else{
x[1]
}
}
V(g)$name <- sapply(str_split(V(g)$name, '_'), aux)
#### carbapenemase
args <- read.table('../metadata/Bla_Carb.dat', head=F, stringsAsFactors=F)[,1]
carb <- setNames(rep('No', length(V(g)$name)),V(g)$name)
carb[names(carb) %in% args] <- 'Yes'
V(g)$carb <- carb
g
}
Main figure of all genes
g <- get_graph_data()
g_cols <- c('burlywood', pal_npg('nrc')(10)[2:7], 'purple', 'blue', 'darkgreen', 'pink', 'red', 'gold', 'black')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size=5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/fig3a.ar_gene_linkage_graph.pdf', p, width=16, height=16)
Format data
edge.dat <- data.frame(v=V(g)$name, g=LETTERS[V(g)$community])
genome.info <- read.table('../tables/genome_info.dat', sep = '\t')
merge(dat, genome.info, by=c(1,2)) %>% ## only take the high/medium quality ones
select(Species=V1, gene=V6.x) %>% mutate(gene=str_replace(gene, '_.*','')) %>%
filter(Species!='plasmid') -> sp.dat
sankey.dat <- merge(sp.dat, edge.dat, by.x=2, by.y=1) %>%
mutate(Species=str_replace(Species,'_',' ')) %>%
group_by(Species, g) %>%
count %>%
mutate(Antibiotics_cluster=as.character(g)) %>%
filter(n>2)
Plot
p <- ggplot(sankey.dat,aes(y=log10(n), axis1=Species, axis2=Antibiotics_cluster)) +
geom_alluvium(aes(fill=Antibiotics_cluster), width=1/5) +
geom_stratum(width=1/5, fill=NA) +
geom_text(stat = "stratum", label.strata = TRUE, size=6, fontface = "italic", nudge_x=-0.12, hjust=1) +
scale_fill_manual(values=g_cols) +
scale_x_discrete(limits = c("Species", "Antibiotics Cluster"), expand = c(.5, .05)) +
theme_void() +
theme()
p
ggsave('../plots/fig3b.ar_gene_linkage_sankey.pdf', p, width=18, height=18)
Staphylococcus aureus network
g <- get_graph_data('Staphylococcus_aureus')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/sup11.staph_aureus_ar_gene_linkage_graph.pdf', p, width=5, height=5)
Genome network
g <- get_graph_data('plasmid', reverse = TRUE)
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/sup12.genome_ar_gene_linkage_graph.pdf', p, width=16, height=16)
Plasmid network
g <- get_graph_data('plasmid')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/sup12.plasmid_ar_gene_linkage_graph.pdf', p, width=10, height=7)
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
##
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] bindrcpp_0.2.2 ggalluvial_0.9.1 ggraph_1.0.2 igraph_1.2.2
## [5] readr_1.1.1 foreach_1.4.4 ggsci_2.9 reshape2_1.4.3
## [9] stringr_1.3.0 tibble_2.0.1 tidyr_0.8.0 dplyr_0.7.4
## [13] gridExtra_2.3 ggplot2_3.1.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.4 purrr_0.2.4 colorspace_1.3-2
## [4] htmltools_0.3.6 viridisLite_0.3.0 yaml_2.1.18
## [7] utf8_1.1.4 rlang_0.3.1 pillar_1.3.1
## [10] glue_1.2.0 withr_2.1.2 tweenr_1.0.0
## [13] bindr_0.1.1 plyr_1.8.4 munsell_0.5.0
## [16] gtable_0.2.0 codetools_0.2-15 evaluate_0.10.1
## [19] labeling_0.3 knitr_1.20 fansi_0.4.0
## [22] Rcpp_0.12.18 scales_1.0.0 backports_1.1.2
## [25] farver_1.0 ggforce_0.1.3 hms_0.4.2
## [28] digest_0.6.18 stringi_1.2.2 ggrepel_0.8.0
## [31] rprojroot_1.3-2 cli_1.0.1 tools_3.4.4
## [34] magrittr_1.5 lazyeval_0.2.1 crayon_1.3.4
## [37] pkgconfig_2.0.2 MASS_7.3-49 assertthat_0.2.0
## [40] rmarkdown_1.9 iterators_1.0.9 viridis_0.5.1
## [43] R6_2.2.2 units_0.6-1 compiler_3.4.4